Abundance of Deer in Victoria

Determining abundance of deer species in Victoria using camera trap data

Justin G Cally https://justincally.github.io/blog/ (Arthur Rylah Institute)https://www.ari.vic.gov.au/
2023-07-06

Setup

Load in relevant packages for analysis, additionally, connect to the database. Camera trap and site data is stored on the database.

Show code
library(cmdstanr)
library(dplyr)
library(sf)
library(bayesplot)
library(loo)
library(gt)
library(terra)
library(caret)
library(tidyterra)
library(tidyr)
library(VicmapR)
library(kableExtra)
check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)
register_knitr_engine(override = FALSE)
options(mc.cores=8)

#### Database connection ####
con <- weda::weda_connect(password = keyring::key_get(service = "ari-dev-weda-psql-01",
                                                username = "psql_user"), username = "psql_user")

Custom Functions

Additional functions used in the data preparation, modelling and analysis are available in the /functions directory.

Show code
# Source internal functions
sapply(list.files("functions", full.names = T), source, verbose = F)

Prepare Data

Wrangle and format data for the STAN models for the various species.

Scope of models

Outline which species should be modeled, and which projects to source data from.

Show code
# Species to run model for.
deer_species_all <- c("Cervus unicolor", "Dama dama", "Cervus elaphus", "Axis porcinus")
# Projects to select.
project_short_name <- c("hog_deer_2023", "StatewideDeer")
# Buffer for data extraction.
spatial_buffer <- 1000
# Covariate Rasters
raster_files <- "/Volumes/DeerVic\ Photos/Processed_Rasters"
# raster_files <- "data/prediction_raster"
prediction_raster <- "data/prediction_raster/statewide_raster.tif"
# For the integrated model we place limits on the maximum density of deer to integrate over. In cases where there are no detections on the camera this is limited to 15 per km2. In cases where deer were detetected on the camera this is expanded to 50. We believe this is sufficiently high. Very high values of these will be less efficient. 
n_max_no_det <- 15
n_max_det <- 50

Camera locations

Download the camera locations from the database, this table outlines the locations and the deployment history of the cameras.

Show code
cams_curated <- tbl(con, dbplyr::in_schema("camtrap", "curated_camtrap_operation")) %>%
  dplyr::filter(ProjectShortName %in% !!project_short_name) %>% # Only retrieve cam records for hog deer 2023
  dplyr::collect() %>%
  dplyr::arrange(SiteID) %>%
  sf::st_as_sf(., coords = c("Longitude", "Latitude"), crs = 4283) 

n_site <- nrow(cams_curated)

Formulas for detection and abundance

Here we outline formulas to be used in the models. The formulas account for the various fixed-effect parameters.

Show code
#### Model formulas ####

#### Transect Formula: Survey Only ####
transect_formula <- ~Survey

#### Abundance Formula Options #### 
# Simple Formula
ab_formula_2 <- ~ scale(sqrt(BareSoil)) + scale(sqrt(Nitrogen)) + scale(sqrt(PastureDistance)) + scale(TreeDensity) + scale(sqrt(ForestEdge))

# Simple with Coastal Distance: Hog Deer
ab_formula_3 <- ~ scale(sqrt(BareSoil)) + scale(sqrt(Nitrogen)) + scale(sqrt(PastureDistance)) + scale(TreeDensity) + scale(sqrt(ForestEdge)) + scale(dist_coast) 

#### Detection Formula: Distance-sampling ####
det_formula <- ~ scale(HerbaceousUnderstoryCover) # average detection across all sites #scale(HerbaceousUnderstoryCover)
det_formula2 <- ~1

Create model data

Using the prepare_model_data() function we generate the data for the STAN model. This function will:
1. Download data from the database 2. Format that data to match the distance-sampling bins
3. Organise the counts into sites and group sizes
4. Generate model matrix of the various submodels (distance-sampling, abundance and transect) 5. Generate data for the prediction process 6. Generate data for the random effect (bioregion)
7. Generate data for regional predictions (indexing based on DEECA regions)

Show code
model_data_file <- "data/model_data.rds"
if(!file.exists(model_data_file)) {
evaluate_transects <- c(TRUE, TRUE, TRUE, TRUE)
model_data <- list()
for(i in 1:length(deer_species_all)) {
  
  if(deer_species_all[i] == "Cervus elaphus") {
    det_to_use <- det_formula2
  } else {
    det_to_use <- det_formula
  }
  
    if(deer_species_all[i] == "Axis porcinus") {
    ab_to_use <- ab_formula_3
  } else {
    ab_to_use <- ab_formula_2
  }
  
model_data[[i]] <- prepare_model_data(species = deer_species_all[i],
                                 projects = project_short_name,
                     buffer = spatial_buffer,
                     detection_formula = det_to_use,
                     abundance_formula = ab_to_use,
                     transect_formula = transect_formula,
                     con = con,
                     raster_dir = raster_files,
                     prediction_raster = prediction_raster,
                     n_max_no_det = n_max_no_det,
                     n_max_det = n_max_det,
                     evaltransects = evaluate_transects[i],
                     snapshot_interval = 2, 
                     hs_df = 1,
                     hs_df_global = 1,
                     hs_scale_global = 2/sqrt(nrow(cams_curated)), # ratio of expected non-zero to zero divided by total observation as per brms convention
                     hs_scale_slab = 1,
                     hs_df_slab = 4)

# not used in model but for plots later
model_data[[i]]$raw_data[is.na(model_data[[i]]$raw_data)] <- 0
model_data[[i]]$transects[is.na(model_data[[i]]$transects)] <- 0

}
names(model_data) <- deer_species_all
saveRDS(model_data, model_data_file)
} else {
  model_data <- readRDS(model_data_file)
}

Model Execution

Model settings

Below we list the MCMC setting for our model. We run models on eight parallel chains for 1,000 iterations each (300 warmup and 300 sampling). These setting provide us with 4,000 posterior draws (8 x 500).

Show code
# STAN settings
ni <- 250 # sampling iterations
nw <- 250 # warmup iterations 
nc <- 6 # number of chains

Read in Models

These are the models used in the analysis. The first is an integrated model that requires transect and camera data. The second is a count-only model that just requires the camera data.

Show code
functions {

  /* Efficient computation of the horseshoe prior
   * Args:
   *   zb: standardized population-level coefficients
   *   global: global horseshoe parameters
   *   local: local horseshoe parameters
   *   scale_global: global scale of the horseshoe prior
   *   c2: positive real number for regularization
   * Returns:
   *   population-level coefficients following the horseshoe prior
   */
  vector horseshoe(vector zb, vector[] local, real[] global,
                   real scale_global, real c2) {
    int K = rows(zb);
    vector[K] lambda = local[1] .* sqrt(local[2]);
    vector[K] lambda2 = square(lambda);
    real tau = global[1] * sqrt(global[2]) * scale_global;
    vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
    return zb .* lambda_tilde * tau;
  }
}
data {
  int<lower=0> N;                      // number of observations
  real delta;                          // bin width
  int<lower=1> n_site;                 // site
  int<lower=1> n_distance_bins;        // distance bins
  int<lower=1> n_gs;                   // number of group sizes
  vector[n_gs] gs;                     // group sizes
  vector[n_distance_bins] midpts;      // midpt of each interval
  real<lower=1> max_distance;         // truncation distance
  int<lower=1> max_int_dist;          // max distance as an integer
  real<lower=0> theta_frac;           // fraction of camera view
  array[n_site] int effort;           // effort
  array[n_site, n_gs] int n_obs;      //number of observations at each site and for varying group sizes
  array[n_site, n_distance_bins, n_gs] int y; // observations matrix

  // summary of whether species is known to be present at each site
  int<lower = 0, upper = 1> any_seen[n_site];

  // number of surveys at each site
  int<lower = 0> n_survey[n_site];

  // availability prior information
  real<lower=0> bshape;               // availability shape
  real<lower=0> bscale;               // availability scale

  // camera trap detection parameters
  int<lower=0> det_ncb;                 // number of covariates for detection model
  matrix[n_site, det_ncb] det_model_matrix; // detection model matrix
  array[n_gs, n_distance_bins] real pa ; // averaged availability for multiple individuals

  // Abundance/occupancy model matrix
  int<lower = 1> m_psi; // number of covariates
  matrix[n_site, m_psi] X_psi; // model matrix

  // hs params
  real<lower=0> hs_df; // horseshoe prior param: see brms documentation of hs() for explantations of data
  real<lower=0> hs_df_global;
  real<lower=0> hs_df_slab;
  real<lower=0> hs_scale_global;
  real<lower=0> hs_scale_slab;

  //transect level information
  int<lower=1> trans;                  // total number of transects across all sites for all methods
  int<lower = 0, upper = 1> y2[trans]; // transect based binomial detections
  int<lower = 0, upper = trans> start_idx[n_site]; // first "transect" of the site, which is the camera detection
  int<lower = 0, upper = trans> end_idx[n_site]; // last transect of the site
  int<lower=1> trans_det_ncb;           // number of covariates for transect detection model
  matrix[trans, trans_det_ncb] trans_pred_matrix; // transect detection model matrix
  int<lower=1>  n_max[n_site]; // max for poisson RN

  // Prediction data
  int<lower=1> npc;                 // number of prediction grid cells
  matrix[npc, m_psi] X_pred_psi;    // pred matrix
  vector[npc] prop_pred;            // offset for prediction
  // bioregion RE
  int<lower=1> np_bioreg;           // number of bioregions
  int<lower=1> site_bioreg[n_site]; // bioregion of each site
  int<lower=1> pred_bioreg[npc];    // bioregion of each prediction grid
    // DEECA region data
  int<lower=1> np_reg;              // number of regions
  int<lower=1> site_reg[n_site];    // region of each site
  int<lower=1> pred_reg[npc];       // region of each prediction grid
}

transformed data {
  vector[n_site] survey_area;       // total survey area, calculated from snapshot moments and camera field of view
  vector[n_site] cam_seen;          // whether any have been seen at a site on camera
    for(i in 1:n_site) {
      survey_area[i] = theta_frac * effort[i] * pi() * (max_distance/1000)^2;
      cam_seen[i] = sum(n_obs[i,]);
    }

//  for (i in 1:n_distance_bins) {
//    //pi[i] = delta/max_distance; // availability function (line transect)
//    pi[i] = (2 * delta * midpts[i])/max_distance^2; // point transect
//  }

}

parameters {
 // abundance parameters
  simplex[n_gs] eps_ngs; // random variation in group size
  vector[1] beta_intercept; // intercept parameter (not subject to horseshoe)
  // detection parameters
  vector[det_ncb] beta_det; // coefficients for detection submodel
  // transect detection parameters
  vector[trans_det_ncb] beta_trans_det; // coefficients for transect submodel
  // temporal availability parameters
  real<lower=0, upper=1> activ; // average activity of deer, equates to the time they are available for detection
  // bioregion RE
  real<lower=0> bioregion_sd; // standard deviation for bioregion random effect
  vector[np_bioreg] bioregion_raw; // bioregion-level effect
  // local parameters for horseshoe prior
  vector[m_psi-1] zb;
  vector<lower=0>[m_psi-1] hs_local[2];
  // horseshoe shrinkage parameters
  real<lower=0> hs_global[2];
  real<lower=0> hs_c2;
}

transformed parameters {
  // random effects
  vector[np_bioreg] eps_bioregion; // bioregion random effect
  // distance parameters
  array[n_site] real log_sigma; // log of distance sampling detection function sigma
  array[n_site] real sigma; // distance sampling detection function
  array[n_site, n_distance_bins] real p_raw; // detection probability
  array[n_site, n_distance_bins, n_gs] real p_raw_scale; // detection probability for point independence model and accounting for availability
  array[n_site, n_distance_bins, n_gs] real<upper=0> log_p_raw; // log detection probability accounting for availability
  array[n_site, n_gs] real log_p; // log probability with summed exponential for the multinomial logit model
  array[n_site, n_gs] real<lower=0,upper=1> p; // log probability with summed exponential for the multinomial logit model
  // abundance parameters
  array[n_site, n_gs] real<lower=0> lambda; // expected number of groups for the camera surveys
  // activity parameters
  real log_activ = log(activ);
  vector[trans] logit_trans_p = trans_pred_matrix * beta_trans_det; // observation process model
  // lp_site for RN model
  vector[n_site] lp_site; // log-probability for each site based on RN loops
  vector[trans] r = inv_logit(logit_trans_p); // probability of detection for each transect observation/period, the first at each site is the camera-level detection rate
  vector[n_site] log_lambda_psi; //log lambda for estimating number of groups per site
  // hs betas
  vector[m_psi-1] beta_covs; // covariates with horseshow prior
  vector[m_psi] beta_psi; // covariates combined with intercept

  beta_covs = horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2); // horseshoe bior placed on beta covariates
  beta_psi = append_row(beta_intercept, beta_covs); // combine intercept and covariates

  for(b in 1:np_bioreg) {
    eps_bioregion[b] = bioregion_sd * bioregion_raw[b]; // generate bioregion random effect
  }

for(n in 1:n_site) {
  log_sigma[n] = det_model_matrix[n,] * beta_det; // calculate sigma from detection function
  sigma[n] = exp(log_sigma[n]);
  for (i in 1:n_distance_bins) {
      // assuming a half-normal detection fn from line
       p_raw[n,i] = exp(-(midpts[i])^2/(2*sigma[n]^2)); // half-normal function (pg 419 of AHM)
      // hazard function
      // p_raw[n,i] = 1 - exp(-(midpts[i]/theta)^(-sigma[n])); // hazard function (pg 507 of AHM)
    for(j in 1:n_gs) {
      p_raw_scale[n,i,j] = p_raw[n,i]*pa[j,i]; //  pr(animal occurs and is detected in bin i)
      log_p_raw[n,i,j] = log(p_raw_scale[n,i,j]);
      }
  }

  log_lambda_psi[n] = X_psi[n,] * beta_psi + eps_bioregion[site_bioreg[n]]; //site-level estimate of abundance of groups

// for various group sizes break down the estimated detection rates and the estimated camera counts
  for(j in 1:n_gs) {
    log_p[n,j] = log_sum_exp(log_p_raw[n,,j]);
    p[n,j] = exp(log_p[n,j]);
    // model site abundance taking into account true abundance, distance sampling (log_p),
    // temporal availability (log_activ), proportional group size (epgs_ngs), and spatial
    // availability/probability of camera-level detection (r[start_idx[n]])
    // offset is the survey area, which includes the snapshot moments and area in camera field of view
    lambda[n,j] = exp(log_lambda_psi[n] + log_p[n,j] + log_activ + log(eps_ngs[j]) + log(r[start_idx[n]])) .* survey_area[n];
  }

// Royle-Nichols implementation in STAN (looping over possible discrete values of N)
// https://discourse.mc-stan.org/t/royle-and-nichols/14150
// https://discourse.mc-stan.org/t/identifiability-across-levels-in-occupancy-model/5340/2
if (n_survey[n] > 0) {
  vector[n_max[n] - any_seen[n] + 1] lp;
// seen
    if(any_seen[n] == 0){ // not seen
      lp[1] = poisson_lpmf(0 | exp(log_lambda_psi[n]));
    }
// not seen
// lp 1 simplification (not necessary)
    else lp[1] = poisson_lpmf(1 | exp(log_lambda_psi[n])) +
    bernoulli_lpmf(y2[start_idx[n]:end_idx[n]] | r[start_idx[n]:end_idx[n]]);
     // loop through possible values for maximum count (km2)
    for (j in 2:(n_max[n] - any_seen[n] + 1)){
      lp[j] = poisson_lpmf(any_seen[n] + j - 1 | exp(log_lambda_psi[n]))
      + bernoulli_lpmf(y2[start_idx[n]:end_idx[n]] | 1 - (1 - r[start_idx[n]:end_idx[n]])^(any_seen[n] + j - 1));
    }
    lp_site[n] = log_sum_exp(lp);
  } else{
    lp_site[n] = 0;
  }

  }
    }

model {
  beta_det ~ normal(0, 4); // prior for sigma
  eps_ngs ~ uniform(0, 1); // prior for group size effect
  beta_intercept ~ normal(-3, 3); // prior for intercept in poisson model
  beta_trans_det ~ normal(0, 3); // beta for transect detection
  activ ~ beta(bshape, bscale);  //informative prior
  bioregion_sd ~ normal(0, 2); // prior for bioregion RE SD
  bioregion_raw ~ normal(0,1); // prior for bioregion RE effect

  for(n in 1:n_site) {
  for(j in 1:n_gs) {
  target += poisson_lpmf(n_obs[n,j] | lambda[n,j]);
  y[n,,j] ~ multinomial_logit(to_vector(log_p_raw[n,,j]));
  }
  target += lp_site[n];
}
  // priors including all constants: poisson
  target += normal_lpdf(zb | 0, 1);
  target += normal_lpdf(hs_local[1] | 0, 1)
    - 101 * log(0.5);
  target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
  target += normal_lpdf(hs_global[1] | 0, 1)
    - 1 * log(0.5);
  target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
  target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
}

generated quantities {
  array[n_site,n_gs] real n_obs_pred;
  array[n_site, n_gs] real n_obs_true;
  array[n_site] real N_site;
  array[n_site] real N_site_pred;
  array[n_site, max_int_dist+1] real DetCurve;
  array[n_site, n_gs] real log_lik1;
  array[n_site, n_gs] real log_lik2;
  array[n_site] real log_lik;
  vector[n_site] Site_lambda;
  vector[n_site] psi;
  array[npc] real pred;
  array[npc] real pred_trunc;
  array[np_reg] real Nhat_reg;
  real av_gs;
  real Nhat;
  real Nhat_trunc;
  int trunc_counter;
  trunc_counter = 0;


for(n in 1:n_site) {
  for(j in 1:n_gs) {
  log_lik1[n,j] = multinomial_logit_lpmf(y[n,,j] | to_vector(log_p_raw[n,,j])); //for loo
  log_lik2[n,j] = poisson_lpmf(n_obs[n,j] | lambda[n,j]); //for loo
  n_obs_true[n, j] = gs[j] * (poisson_log_rng(log_lambda_psi[n] + log(eps_ngs[j])));
  n_obs_pred[n,j] = gs[j] * poisson_rng(lambda[n,j]);
    }
    // get loglik on a site level
    log_lik[n] = log_sum_exp(log_sum_exp(log_sum_exp(log_lik1[n,]), log_sum_exp(log_lik2[n,])), lp_site[n]);
    Site_lambda[n] = exp(log_lambda_psi[n]);
    N_site[n] = sum(n_obs_true[n,]);
    N_site_pred[n] = sum(n_obs_pred[n,]);
    // Occupancy probability transformation
    psi[n] = inv_cloglog(log_lambda_psi[n]);

  // loop over distance bins
  for(j in 0:max_int_dist) { // get DS predictions for distance 0 to max bin distance
    DetCurve[n, j+1] = exp(-(j+0.5)^2/(2*sigma[n]^2)); // half normal
    // DetCurve[n, j+1] =  1 - exp(-(j+0.5/theta)^(-sigma[n])); //hazard rate
    }
}
  av_gs = sum(gs .* eps_ngs);

  for(i in 1:np_reg) Nhat_reg[i] = 0;

for(i in 1:npc) {
  pred[i] = poisson_log_rng(X_pred_psi[i,] * beta_psi + eps_bioregion[pred_bioreg[i]]) * prop_pred[i] * av_gs; //offset
  if(pred[i] > max(N_site)) {
    pred_trunc[i] = max(N_site);
    trunc_counter += 1;
  } else {
    pred_trunc[i] = pred[i];
  } // upper limit placed at highest site estimate
  Nhat_reg[pred_reg[i]] += pred[i];
}
Nhat = sum(pred);
Nhat_trunc = sum(pred_trunc);
}
Show code
functions {

  /* Efficient computation of the horseshoe prior
   * Args:
   *   zb: standardized population-level coefficients
   *   global: global horseshoe parameters
   *   local: local horseshoe parameters
   *   scale_global: global scale of the horseshoe prior
   *   c2: positive real number for regularization
   * Returns:
   *   population-level coefficients following the horseshoe prior
   */
  vector horseshoe(vector zb, vector[] local, real[] global,
                   real scale_global, real c2) {
    int K = rows(zb);
    vector[K] lambda = local[1] .* sqrt(local[2]);
    vector[K] lambda2 = square(lambda);
    real tau = global[1] * sqrt(global[2]) * scale_global;
    vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
    return zb .* lambda_tilde * tau;
  }
}
data {
  int<lower=0> N;                      // number of observations
  real delta;                          // bin width
  int<lower=1> n_site;                 // site
  int<lower=1> n_distance_bins;        // distance bins
  int<lower=1> n_gs;                   // number of group sizes
  vector[n_gs] gs;                     // group sizes
  vector[n_distance_bins] midpts;      // midpt of each interval
  real<lower=1> max_distance;         // truncation distance
  int<lower=1> max_int_dist;          // max distance as an integer
  real<lower=0> theta_frac;           // fraction of camera view
  array[n_site] int effort;           // effort
  array[n_site, n_gs] int n_obs;      //number of observations
  array[n_site, n_distance_bins, n_gs] int y; // observations matrix

  // summary of whether species is known to be present at each site
  int<lower = 0, upper = 1> any_seen[n_site];

  // number of surveys at each site
  int<lower = 0> n_survey[n_site];

  // availability prior information
  real<lower=0> bshape;               // availability shape
  real<lower=0> bscale;               // availability scale

  // camera trap detection parameters
  int<lower=0> det_ncb;                 // number of covariates for detection model
  matrix[n_site, det_ncb] det_model_matrix; // detection model matrix
  array[n_gs, n_distance_bins] real pa ; // averaged availability for multiple individuals

  // Abundance/occupancy model matrix
  int<lower = 1> m_psi;
  matrix[n_site, m_psi] X_psi;
  // negbinom scale
  // real reciprocal_phi_scale;
  // hs params
  real<lower=0> hs_df; // horseshoe prior param
  real<lower=0> hs_df_global;
  real<lower=0> hs_df_slab;
  real<lower=0> hs_scale_global;
  real<lower=0> hs_scale_slab;

  //transect level information
  int<lower=1> trans;                  // total number of transects across all sites for all methods
  int<lower = 0, upper = 1> y2[trans]; // transect based binomial detections
  int<lower = 0, upper = trans> start_idx[n_site];
  int<lower = 0, upper = trans> end_idx[n_site];
  int<lower=1> trans_det_ncb;           // number of covariates for transect detection model
  matrix[trans, trans_det_ncb] trans_pred_matrix; // transect detection model matrix
  int<lower=1>  n_max[n_site]; // max for poisson RN

  // Prediction data
  int<lower=1> npc;                 // number of prediction grid cells
  matrix[npc, m_psi] X_pred_psi; // pred matrix
  vector[npc] prop_pred; //offset
  // bioregion RE
  int<lower=1> np_bioreg;
  int<lower=1> site_bioreg[n_site];
  int<lower=1> pred_bioreg[npc];
    // region data
  int<lower=1> np_reg;
  int<lower=1> site_reg[n_site];
  int<lower=1> pred_reg[npc];
}

transformed data {
  // vector[n_distance_bins] pi; // availability for point
  vector[n_site] survey_area;
  vector[n_site] cam_seen;
    for(i in 1:n_site) {
      survey_area[i] = theta_frac * effort[i] * pi() * (max_distance/1000)^2;
      cam_seen[i] = sum(n_obs[i,]);
    }

}

parameters {
 // abundance parameters
  simplex[n_gs] eps_ngs; // random variation in group size
  vector[1] beta_intercept;
  vector[det_ncb] beta_det;
  // transect detection parameters
  vector[trans_det_ncb] beta_trans_det;
  // temporal availability parameters
  real<lower=0, upper=1> activ;
  // bioregion RE
  real<lower=0> bioregion_sd;
  vector[np_bioreg] bioregion_raw;
  // local parameters for horseshoe prior
  vector[m_psi-1] zb;
  vector<lower=0>[m_psi-1] hs_local[2];
  // horseshoe shrinkage parameters
  real<lower=0> hs_global[2];
  real<lower=0> hs_c2;
}

transformed parameters {
  // random effects
  vector[np_bioreg] eps_bioregion; // bioregion random effect
  // distance parameters
  array[n_site] real log_sigma;
  array[n_site] real sigma;
  array[n_site, n_distance_bins] real p_raw; // detection probability
  array[n_site, n_distance_bins, n_gs] real p_raw_scale; // detection probability for point independence model and accounting for availability
  array[n_site, n_distance_bins, n_gs] real<upper=0> log_p_raw; // log detection probability accounting for availability
  array[n_site, n_gs] real log_p; // log probability with summed exponential for the multinomial logit model
  array[n_site, n_gs] real<lower=0,upper=1> p; // log probability with summed exponential for the multinomial logit model
  // abundance parameters
  array[n_site, n_gs] real<lower=0> lambda;
  // activity parameters
  real log_activ = log(activ);
  // lp_site for RN model
  vector[n_site] log_lambda_psi;
  // hs betas
  vector[m_psi-1] beta_covs;
  vector[m_psi] beta_psi;

  beta_covs = horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2);
  beta_psi = append_row(beta_intercept, beta_covs);

  for(b in 1:np_bioreg) {
    eps_bioregion[b] = bioregion_sd * bioregion_raw[b];
  }

for(n in 1:n_site) {
  log_sigma[n] = det_model_matrix[n,] * beta_det;
  sigma[n] = exp(log_sigma[n]);
  for (i in 1:n_distance_bins) {
      // assuming a half-normal detection fn from line
       p_raw[n,i] = exp(-(midpts[i])^2/(2*sigma[n]^2)); // half-normal function (pg 419 of AHM)
      // hazard function
      // p_raw[n,i] = 1 - exp(-(midpts[i]/theta)^(-sigma[n])); // hazard function (pg 507 of AHM)
    for(j in 1:n_gs) {
      p_raw_scale[n,i,j] = p_raw[n,i]*pa[j,i]; //  pr(animal occurs and is detected in bin i)
      log_p_raw[n,i,j] = log(p_raw_scale[n,i,j]);
      }
  }

  log_lambda_psi[n] = X_psi[n,] * beta_psi + eps_bioregion[site_bioreg[n]];

  for(j in 1:n_gs) {
    log_p[n,j] = log_sum_exp(log_p_raw[n,,j]);
    p[n,j] = exp(log_p[n,j]);
    // model site abundance
    lambda[n,j] = exp(log_lambda_psi[n] + log_p[n,j] + log_activ + log(eps_ngs[j])) .* survey_area[n];
  }

    }
}

model {
  beta_det ~ normal(0, 4); // prior for sigma
  eps_ngs ~ uniform(0, 1); // prior for group size effect
  beta_intercept ~ normal(-3, 3); // prior for intercept in poisson model
  activ ~ beta(bshape, bscale);  //informative prior
  bioregion_sd ~ normal(0, 2);
  bioregion_raw ~ normal(0,1);

  for(n in 1:n_site) {
  for(j in 1:n_gs) {
  target += poisson_lpmf(n_obs[n,j] | lambda[n,j]);
  y[n,,j] ~ multinomial_logit(to_vector(log_p_raw[n,,j]));
  }
}
  // priors including all constants: poisson
  target += normal_lpdf(zb | 0, 1);
  target += normal_lpdf(hs_local[1] | 0, 1)
    - 101 * log(0.5);
  target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
  target += normal_lpdf(hs_global[1] | 0, 1)
    - 1 * log(0.5);
  target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
  target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
}

generated quantities {
  array[n_site,n_gs] real n_obs_pred;
  array[n_site, n_gs] real n_obs_true;
  array[n_site] real N_site;
  array[n_site] real N_site_pred;
  array[n_site, max_int_dist+1] real DetCurve;
  array[n_site, n_gs] real log_lik1;
  array[n_site, n_gs] real log_lik2;
  array[n_site] real log_lik;
  vector[n_site] Site_lambda;
  vector[n_site] psi;
  array[npc] real pred;
  array[npc] real pred_trunc;
  array[np_reg] real Nhat_reg;
  real av_gs;
  // array[np_reg] real Nhat_reg_design;
  real Nhat;
  real Nhat_trunc;
  int trunc_counter;
  trunc_counter = 0;


for(n in 1:n_site) {
  for(j in 1:n_gs) {
  log_lik1[n,j] = multinomial_logit_lpmf(y[n,,j] | to_vector(log_p_raw[n,,j])); //for loo
  log_lik2[n,j] = poisson_lpmf(n_obs[n,j] | lambda[n,j]); //for loo
  n_obs_true[n, j] = gs[j] * (poisson_log_rng(log_lambda_psi[n] + log(eps_ngs[j])));
  n_obs_pred[n,j] = gs[j] * poisson_rng(lambda[n,j]);
    }
    // get loglik on a site level
    log_lik[n] = log_sum_exp(log_sum_exp(log_lik1[n,]), log_sum_exp(log_lik2[n,]));
    Site_lambda[n] = exp(log_lambda_psi[n]);
    N_site[n] = sum(n_obs_true[n,]);
    N_site_pred[n] = sum(n_obs_pred[n,]);
    // Occupancy probability transformation
    psi[n] = inv_cloglog(log_lambda_psi[n]);

  // loop over distance bins
  for(j in 0:max_int_dist) { // get DS predictions for distance 0 to max bin distance
    DetCurve[n, j+1] = exp(-(j+0.5)^2/(2*sigma[n]^2)); // half normal
    // DetCurve[n, j+1] =  1 - exp(-(j+0.5/theta)^(-sigma[n])); //hazard rate
    }
}
  av_gs = sum(gs .* eps_ngs);

for(i in 1:np_reg) Nhat_reg[i] = 0;

for(i in 1:npc) {
  pred[i] = poisson_log_rng(X_pred_psi[i,] * beta_psi + eps_bioregion[pred_bioreg[i]]) * prop_pred[i] * av_gs; //offset
  if(pred[i] > max(N_site)) {
    pred_trunc[i] = max(N_site);
    trunc_counter += 1;
  } else {
    pred_trunc[i] = pred[i];
  } // upper limit placed at highest site estimate
  Nhat_reg[pred_reg[i]] += pred[i];
}
Nhat = sum(pred);
Nhat_trunc = sum(pred_trunc);
}
Show code
model_poisson <- cmdstan_model(here::here("stan", "count_det_nondet_poisson.stan"))
model_poisson_co <- cmdstan_model(here::here("stan", "count_only_poisson.stan"))

Fit models

We can fit models using cmdstanr. Here we fit models using a poisson distribution. One model for each of the four deer species. To aid convergence adapt_delta is increased from the default value.

Show code
integrated_model <- c(TRUE, TRUE, TRUE, FALSE)
distributions <- c("poisson")
model_fits <- list()

for(j in 1:length(distributions)) {

  for(i in 1:length(deer_species_all)) {

  if(integrated_model[i]) {
    model_to_fit <- get(paste0("model_", distributions[j]))
  } else {
    model_to_fit <- get(paste0("model_", distributions[j], "_co"))
  }

  model_fits[[i]] <- model_to_fit$sample(data = model_data[[i]],
                                         chains = nc,
                                 parallel_chains = nc, 
                                 init = 0.1, 
                                 max_treedepth = 10, 
                                 refresh = 20, 
                                 step_size = 0.01, 
                                 adapt_delta = 0.99,
                                 show_messages = TRUE,
                                 save_warmup = FALSE,
                                 iter_sampling = ni, 
                                 iter_warmup = nw)

  model_fits[[i]]$save_object(paste0("outputs/models/fit_", 
                                     distributions[j], 
                                     "_", 
                                     deer_species_all[i],".rds"))

  }
  
}
Show code
model_fits <- list()

model_dir <- "outputs/models"

for(i in 1:length(deer_species_all)) {
  model_fits[[i]] <- readRDS(paste0(model_dir, "/fit_poisson_", deer_species_all[i], ".rds"))
}

names(model_fits) <-  paste0("fit_poisson_", deer_species_all)

Model Evaluation

Our strategy for model evaluation is to determine if the models of the sparse regression approach (horseshoe-prior regularised model) preform sufficiently well against a null model (no abundance fixed-effect covariates).

LOO (Leave-One-Out Cross-Validation).

We use leave-one-out cross-validation (LOO-CV); a Bayesian model evaluation process (Vehtari, Gelman, and Gabry 2017; Vehtari et al. 2020) to compare the model against a null model. As long as the model with ‘all’ parameters performs better than the null models it is suitable to use it exclusively for all analyses and predictions. Importantly, the cross-validation also gives us the ability to compare the predictive performance against a null model. In doing so we see that the model with abundance covariates performs better than a null model. Noting that the null model will still include a random bioregion effect.

Show code
loo_compares <- list()
loo_compare_tables <- list()
for(i in 2:length(deer_species_all)) {
  which_models <- model_fits[stringr::str_detect(models_to_read, pattern = deer_species_all[i])]
  
  loo_compares[[i]] <- list()
  
  for(j in 1:length(which_models)) {
     loo_compares[[i]][[j]] <- which_models[[j]]$loo(cores = 6)
  }
  
  names(loo_compares[[i]]) <- names(which_models)
  loo_compare_tables[[i]] <- loo::loo_compare(loo_compares[[i]]) %>% as.data.frame()
  loo_compare_tables[[i]]$Species <- deer_species_all[i]
}

names(loo_compare_tables) <- deer_species_all[1:2]

loo_table_all <- bind_rows(loo_compare_tables) %>%
        tibble::rownames_to_column(var = "model_full") %>%
  mutate(model = stringr::str_extract(model_full, "poisson|negbin")) %>%
        dplyr::select(Species, model, everything(), -model_full) %>%
        arrange(Species) %>% 
        as.data.frame()

# Plot loo table
  gt(loo_table_all, 
     groupname_col = "Species") %>% 
    fmt_number(decimals = 2) %>%
    tab_style(locations = cells_row_groups(), style = cell_text(weight = "bold"))

Posterior predictive checks

Posterior predictive checks allow us to compare the observed data to the model-generated data. For each species we undertake posterior predictive checks for summary statistics relating to the number of deer seen on the cameras at each site. Ideally a well-fit model is able to make predictions that match the observed data. The summary statistics we use for the posterior predictive checks are:

  1. 97.5% Quantile of deer seen at a site on a camera
  2. Mean number of deer seen at a site on a camera
  3. Standard deviation of the counts of deer on the cameras
  4. Average (mean) counts at sites in a scatter plot

Sambar Deer Model.

Show code
q95 <- function(x) quantile(x, 0.95, na.rm = T)
# q25 <- function(x) quantile(x, 0.25, na.rm = T)
q75 <- function(x) quantile(x, 0.75, na.rm = T)
# sd_90 <- function (x, na.rm = FALSE) {
#   quants <- quantile(x, c(0.0, 0.95), na.rm = T)
#   x <- x[x < quants[2] & x > quants[1]]
#   sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x),
#     na.rm = na.rm))
# }

funs <- c(q95, mean, sd, ppc_scatter_avg)
titles <- c("95% Percentile", "Mean", "SD", "Average Site Counts")

ppc_sambar <- list()

# funs <- c(prop_zero, mean, q90, sd)

for(i in 1:length(funs)) {
ppc_sambar[[i]] <- posterior_checks(model = model_fits[[1]], 
                 model_data = model_data$`Cervus unicolor`, 
                 stat = funs[[i]], 
                 integrated = T, 
                 title = titles[i])
}

cowplot::plot_grid(plotlist = ppc_sambar, labels = "AUTO")
Posterior predictive checks for Sambar Deer

Figure 1: Posterior predictive checks for Sambar Deer

Fallow Deer Model

Show code
ppc_fallow <- list()

for(i in 1:length(funs)) {
ppc_fallow[[i]] <- posterior_checks(model = model_fits[[2]], 
                 model_data = model_data$`Dama dama`, 
                 stat = funs[[i]], 
                 integrated = F, only_det = F,
                 title = titles[i])
}

cowplot::plot_grid(plotlist = ppc_fallow)
Posterior predictive checks for Fallow Deer

Figure 2: Posterior predictive checks for Fallow Deer

Red Deer Model

Show code
funs <- c(max, mean, sd, ppc_scatter_avg)
titles <- c("Maximum", "Mean", "SD", "Average Site Counts")

ppc_red <- list()

for(i in 1:length(funs)) {
ppc_red[[i]] <- posterior_checks(model = model_fits[[3]], 
                 model_data = model_data$`Cervus elaphus`, 
                 stat = funs[[i]], 
                 integrated = T, only_det = F,
                 title = titles[i])
}

cowplot::plot_grid(plotlist = ppc_red)
Posterior predictive checks for Red Deer

Figure 3: Posterior predictive checks for Red Deer

Hog Deer Model

Show code
ppc_hog <- list()

for(i in 1:length(funs)) {
ppc_hog[[i]] <- posterior_checks(model = model_fits[[4]], 
                 model_data = model_data$`Axis porcinus`, 
                 stat = funs[[i]], 
                 integrated = F, only_det = F,
                 title = titles[i])
}

cowplot::plot_grid(plotlist = ppc_hog)
Posterior predictive checks for Hog Deer

Figure 4: Posterior predictive checks for Hog Deer

Model Predictions

Within the STAN model we generate predictions for sampled and unsampled locations. This provides us with site-level abundance estimates as well as estimates across all (unsampled) public forest.

Site-based Predictions

Site-based Prediction Map

Visualisation of point-estimates for the various deer species surveyed for in this study.

Show code
vic_regions <- vicmap_query("open-data-platform:delwp_region") %>%
  collect() %>%
  st_transform(3111) %>%
  st_simplify(dTolerance = 500)


site_preds <- function(model, cams_curated) {
  rn_dens <- model$summary("N_site")
  density_at_sites_rn <- cbind(cams_curated, rn_dens)
  
  return(density_at_sites_rn)
}

site_density_plot <- function(densities, regions, species) {
  densities$density <- cut(densities$mean,
                                   breaks = c(0, 0.1, 1, 3, 5, 10, max(densities$mean)),
                                   labels = c("<0.1", "0.1 - 1", "1-3", "3 - 5", "5 - 10", "10+"), include.lowest = T, right = T)
  
  if(species == "Hog") {
    regions <- regions %>% filter(delwp_region == "GIPPSLAND")
    densities <- densities %>% st_filter(regions %>% st_transform(4283))
  }
  
plot <- ggplot2::ggplot(data = densities) +
  ggplot2::geom_sf(data = regions, alpha = 0.75, fill = "grey80") +
  ggplot2::geom_sf(aes(fill = density, alpha = mean), shape = 21, size = 4) +
  scale_fill_viridis_d(name = "", guide = guide_legend(override.aes = list(size = 6))) +
  scale_alpha_continuous(range = c(0.5,1), guide = "none") +
  labs(title = paste0('Density of ',species ,' Deer'), fill = bquote('Deer per'~km^2)) +
  theme_bw() +
  theme(legend.text = element_text(size = 18), legend.key.size = unit(1, "cm"),
        title = element_text(size = 22))
  
return(plot)
  
}

sambar_preds <- site_preds(model_fits[[1]], cams_curated = cams_curated)
fallow_preds <- site_preds(model_fits[[2]], cams_curated = cams_curated)
red_preds <- site_preds(model_fits[[3]], cams_curated = cams_curated)
hog_preds <- site_preds(model_fits[[4]], cams_curated = cams_curated)

cowplot::plot_grid(site_density_plot(sambar_preds, vic_regions, species = "Sambar"),
site_density_plot(fallow_preds, vic_regions, species = "Fallow"),
site_density_plot(red_preds, vic_regions, species = "Red"),
site_density_plot(hog_preds, vic_regions, species = "Hog"), ncol = 1)
Site-level density estimates for all sites sampled as part of statewide and hog deer surveys

Figure 5: Site-level density estimates for all sites sampled as part of statewide and hog deer surveys

Site-density Summaries

As a sanity-check we compare the average modelled densities at sites with (i) no evidence of deer, (ii) evidence of deer present on transects, and (iii) evidence of deer present on cameras. We expect that average densities are generally higher at sites that detected some form of deer than sites that did not detect any sign of deer. Additionally, we would also expect average densities to be generally higher at sites that had detections on cameras than those with only detections from transects. The table below shows these expectations to be correct for Sambar and partially for Fallow. Red Deer had too few detections for this check to be comprehensive but alongside Hog Deer, Red Deer density was higher at sites with detections on the camera than those where they weren’t seen.

Show code
density_summary_table <- function(preds, model_data, species) {
  
  cam_seen <- as.integer(as.logical(rowSums(model_data$n_obs)))
  
  preds_sum <- preds %>% 
    st_drop_geometry() %>%
    mutate(`Species` = species,
           `CameraDetection` = cam_seen, 
           `AnyDetection` = model_data$any_seen, 
           `Detection` = case_when(CameraDetection == 0 & AnyDetection == 0 ~ "Not seen", 
                                   CameraDetection == 0 & AnyDetection == 1 ~ "Only detected on transects", 
                                   CameraDetection == 1 & AnyDetection == 1 ~ "Seen on cameras")) %>%
    group_by(`Species`, `Detection`) %>%
    summarise(`Number of Sites` = n(),
              `Average Density` = mean(mean)) %>%
    ungroup()
  
  return(preds_sum)
}

density_summary <- bind_rows(
  density_summary_table(sambar_preds, model_data[[1]], species = "Sambar"),
  density_summary_table(fallow_preds, model_data[[2]], species = "Fallow"),
  density_summary_table(red_preds, model_data[[3]], species = "Red"),
  density_summary_table(hog_preds, model_data[[4]], species = "Hog"))

density_summary %>%
  kbl(format = "html", digits = 2) %>%
  kable_styling("striped") %>%
  collapse_rows(1)
Species Detection Number of Sites Average Density
Sambar Not seen 180 0.63
Only detected on transects 33 2.00
Seen on cameras 104 2.84
Fallow Not seen 253 0.81
Only detected on transects 34 0.73
Seen on cameras 30 3.03
Red Not seen 304 0.06
Only detected on transects 1 1.26
Seen on cameras 12 1.12
Hog Not seen 295 0.19
Seen on cameras 22 2.41

Regional and Statewide Abundance

Within our model we calculate abundance/density for deer in each of the 73323 km2 of public land. Based on these spatial predictions we can estimate abundance at a regional level (6 DEECA regions) and across the whole state.

Statewide Maps

Using the model predictions (“pred”) for all suitable public land we generate a raster (1km2 resolution). We save the average spatial estimates under outputs/rasters (tif files) and also provide binned plots (png files) in outputs/plots.

Show code
pred_raster_full <- terra::rast(prediction_raster)

pred_raster <- terra::app(pred_raster_full[[stringr::str_subset(
  stringr::str_remove_all(labels(terms(ab_formula_3)),
                          "scale[(]|[)]|sqrt[(]"), 
  pattern = "[*]", negate = T)]], mean)

mean_raster <- list()
mean_raster_discrete <- list()

for(i in 1:length(model_fits)) {

gp_preds_draws_all <- model_fits[[i]]$draws("pred", format = "matrix")

terra::values(pred_raster)[!is.na(terra::values(pred_raster))] <- apply(gp_preds_draws_all, 
                                                                        2, 
                                                                        mean, 
                                                                        na.rm = T, 
                                                                        trim = 0)

mean_raster[[i]] <- pred_raster
mean_raster_discrete[[i]] <- mean_raster[[i]]
max_pred <- max(values(mean_raster[[i]]), na.rm = T)

values(mean_raster_discrete[[i]]) <- cut(values(mean_raster_discrete[[i]]) , 
                                         breaks = c(0, 0.1,1,3,5,10, max_pred), 
                                         include.lowest = T, right = T,
                                         labels = c("0 - 0.1", 
                                                    "0.1 - 1", 
                                                    "1 - 3", 
                                                    "3 - 5",
                                                    "5 - 10", 
                                                    "10 +"))
}

# combine mean rasters together
combined_raster <- rast(mean_raster)
names(combined_raster) <- c("Average Sambar Deer Density (per km2)", 
                            "Average Fallow Deer Density (per km2)", 
                            "Average Red Deer Density (per km2)", 
                            "Average Hog Deer Density (per km2)")

writeRaster(combined_raster, "outputs/rasters/combined_deer_average_density.tif", overwrite = T)
Show code
# reg <- VicmapR::vicmap_query("open-data-platform:delwp_region") %>%
#     VicmapR::collect() %>%
#   sf::st_transform(3111) %>%
#   sf::st_simplify(dTolerance = 250) 
state <- VicmapR::vicmap_query("open-data-platform:vmlite_victoria_polygon_su5") %>%
  filter(state != "NSW" & state != "SA" & feature_type_code != "sea") %>%
  VicmapR::collect() %>%
  sf::st_transform(3111)

gippsland <- vic_regions %>% filter(delwp_region == "GIPPSLAND")


plot_abundance <- function(raster, state, species, crop = NULL) {
  
  if(!is.null(crop)) {
    raster <- terra::crop(raster, vect(crop), mask = T)
    state <- crop
  }
  
  plot <- ggplot2::ggplot() +
    ggplot2::geom_sf(data = state, alpha = 0.5, linewidth = 0.5, fill = "grey80") + 
    tidyterra::geom_spatraster(data = raster, na.rm = T) + 
    tidyterra::scale_fill_terrain_d(na.translate = FALSE) + 
    ggplot2::labs(fill = bquote('Deer per'~km^2), title = paste0("Average Density of ", species, " on Victorian Public Land")) + 
    ggspatial::annotation_scale() 
  
  return(plot)
  
} 

sambar_abundance_plot <- plot_abundance(mean_raster_discrete[[1]], 
                                        state = state, 
                                        species = "Sambar Deer")
fallow_abundance_plot <- plot_abundance(mean_raster_discrete[[2]], 
                                        state = state, 
                                        species = "Fallow Deer")
red_abundance_plot <- plot_abundance(mean_raster_discrete[[3]], 
                                     state = state, 
                                     species = "Red Deer")
hog_abundance_plot <- plot_abundance(mean_raster_discrete[[4]], 
                                     state = state, 
                                     species = "Hog Deer", 
                                     crop = gippsland)

ggsave(plot = sambar_abundance_plot, filename = "outputs/plots/sambar_abundance_plot.png", 
       width = 2400, height = 1600, units = "px")
ggsave(plot = fallow_abundance_plot, filename = "outputs/plots/fallow_abundance_plot.png", 
       width = 2400, height = 1600, units = "px")
ggsave(plot = red_abundance_plot, filename = "outputs/plots/red_abundance_plot.png", 
       width = 2400, height = 1600, units = "px")
ggsave(plot = hog_abundance_plot, filename = "outputs/plots/hog_abundance_plot.png", 
       width = 2400, height = 1600, units = "px")

cowplot::plot_grid(sambar_abundance_plot, fallow_abundance_plot, red_abundance_plot, hog_abundance_plot, ncol = 1)
Abundance of Sambar, Fallow, Red and Hog Deer across Victoria, dark-grey area reflects area not included in predictions (i.e. not public land)

Figure 6: Abundance of Sambar, Fallow, Red and Hog Deer across Victoria, dark-grey area reflects area not included in predictions (i.e. not public land)

Regional Abundance Estimates

For each DEECA region we provide species-level estimates of abundance with 90 % confidence interval. We also calculate the modle-based average density within each region based on the abundance and the total area of public land within each region.

Show code
reg <- VicmapR::vicmap_query("open-data-platform:delwp_region") %>%
    VicmapR::collect() %>%
    dplyr::group_by(delwp_region) %>%
    dplyr::summarise(geometry = sf::st_combine(geometry)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(delwp_region_fact = as.integer(factor(delwp_region))) 

abundance_table <- function(model, regions, pred_area, caption) {
  regional_abundance <- model$summary("Nhat_reg") %>%
    dplyr::bind_rows(model$summary("Nhat")) %>% 
    dplyr::mutate(variable = c(regions$delwp_region, "TOTAL"), 
                  `Area km2` = c(pred_area, sum(pred_area)), 
                  `Average Density (per km2)` = round(mean/`Area km2`, 2)) %>% 
    dplyr::select(Region = variable, 
                  Mean = mean, 
                  Median = median, 
                  SD = sd, 
                  `5 %` = q5, 
                  `90 %` = q95, 
                  `Area km2`,
                  `Average Density (per km2)`)
  
  kableExtra::kbl(regional_abundance, digits = 1, format = "html", caption = caption) %>%
    kableExtra::kable_styling("striped") %>%
    kableExtra::column_spec(1, bold = TRUE) %>%
    kableExtra::row_spec(6, hline_after = T) %>%
    kableExtra::row_spec(7, background = "#c2a5cf", color = "black", bold = T, hline_after = T)
}

abundance_table(model_fits[[1]], regions = reg, pred_area = table(model_data[[1]]$pred_reg), caption = "Regional estimates of Sambar Deer Density")
Table 1: Regional estimates of Sambar Deer Density
Region Mean Median SD 5 % 90 % Area km2 Average Density (per km2)
BARWON SOUTH WEST 57.3 52.7 20.8 33.0 96.3 4764 0.0
GIPPSLAND 39153.5 38863.9 3863.1 33451.4 46067.7 24922 1.6
GRAMPIANS 758.5 749.0 108.4 594.5 950.8 9515 0.1
HUME 46838.3 46477.6 4657.8 39860.1 55014.5 16672 2.8
LODDON MALLEE 690.8 639.8 213.2 491.0 1065.3 15218 0.0
PORT PHILLIP 4001.2 3975.3 409.4 3387.7 4749.9 2232 1.8
TOTAL 91499.5 90701.7 8991.7 78056.8 107448.2 73323 1.2
Show code
abundance_table(model_fits[[2]], regions = reg, pred_area = table(model_data[[2]]$pred_reg), caption = "Regional estimates of Fallow Deer Density")
Table 1: Regional estimates of Fallow Deer Density
Region Mean Median SD 5 % 90 % Area km2 Average Density (per km2)
BARWON SOUTH WEST 4162.8 4135.1 497.9 3400.0 5019.8 4764 0.9
GIPPSLAND 5290.4 5260.7 575.7 4392.8 6287.0 24922 0.2
GRAMPIANS 2456.4 2439.3 281.2 2022.6 2946.6 9515 0.3
HUME 17017.9 16934.4 1794.7 14275.3 20085.8 16672 1.0
LODDON MALLEE 1194.1 1171.2 205.8 916.3 1547.1 15218 0.1
PORT PHILLIP 2433.9 2418.2 272.8 2015.0 2893.3 2232 1.1
TOTAL 32555.5 32414.7 3364.9 27359.5 38299.6 73323 0.4
Show code
abundance_table(model_fits[[3]], regions = reg, pred_area = table(model_data[[3]]$pred_reg), caption = "Regional estimates of Red Deer Density")
Table 1: Regional estimates of Red Deer Density
Region Mean Median SD 5 % 90 % Area km2 Average Density (per km2)
BARWON SOUTH WEST 2533.5 2441.7 612.7 1715.6 3665.1 4764 0.5
GIPPSLAND 795.0 768.2 195.0 527.8 1148.7 24922 0.0
GRAMPIANS 1965.7 1884.8 495.1 1294.7 2894.7 9515 0.2
HUME 2458.3 2407.3 514.7 1728.9 3377.5 16672 0.1
LODDON MALLEE 45.2 13.6 105.1 1.0 195.7 15218 0.0
PORT PHILLIP 20.3 19.2 8.4 8.7 35.8 2232 0.0
TOTAL 7818.1 7617.7 1581.5 5629.6 10578.1 73323 0.1
Show code
abundance_table(model_fits[[4]], regions = reg, pred_area = table(model_data[[4]]$pred_reg), caption = "Regional estimates of Hog Deer Density")
Table 1: Regional estimates of Hog Deer Density
Region Mean Median SD 5 % 90 % Area km2 Average Density (per km2)
BARWON SOUTH WEST 10.9 6.7 12.8 0.0 35.8 4764 0.0
GIPPSLAND 2416.0 2396.6 267.2 2026.2 2892.3 24922 0.1
GRAMPIANS 1.8 0.0 5.7 0.0 7.8 9515 0.0
HUME 2.3 1.1 3.9 0.0 9.1 16672 0.0
LODDON MALLEE 0.8 0.0 3.9 0.0 3.4 15218 0.0
PORT PHILLIP 364.6 361.8 45.5 295.8 445.2 2232 0.2
TOTAL 2796.5 2774.8 308.0 2346.6 3345.8 73323 0.0

Model Covariates

Our model uses covariates to inform three seperate submodels:

Detection Processes

At a site, there are two processes that may lead us to not record deer on a camera when in reality they occupy the site (whereby the site is the home range area surrounding the camera.) Firstly, deer may be available for detection and enter the sampling area in front of the camera. However, due to a function of their distance from the camera we fail to detect this individual. Secondly, they may occupy a site (known from transect signs) but never enter the camera field of view, in this case we estimate the various detection rates from the various methods. The detection rate of the camera in this method is regarded as the spatial availability of deer for camera trap sampling.

Distance-sampling

Below we show the average detection rate for a given group of deer in front of the camera (up to 12.5m). Detection rastes appear to be higher for Sambar than other species.

Show code
species_names <- c("Sambar Deer", "Fallow Deer", "Red Deer", "Hog Deer")
av_det_rates <- list()
for(i in 1:length(model_fits)) {
det_rates <- model_fits[[i]]$summary("p") %>%
  mutate(var = stringr::str_extract(variable, "(?<=\\[).+?(?=\\])")) %>%
  separate(var, into = c("site", "Group Size"), sep = ",")

av_det_rates[[i]] <- det_rates %>%
  group_by(`Group Size`) %>%
  summarise(mean = mean(mean)) %>%
  ungroup() %>% 
  transmute(`Group Size`, 
            Species = species_names[i], 
            `Average Site Detection Probability` = mean)

}

av_det_rates %>%
  bind_rows() %>%
  arrange(`Group Size`) %>%
  kbl("html", digits = 3, caption = "Average detection rates for the different species and different group sizes") %>%
  kable_styling("striped") %>%
  collapse_rows(1)
Table 2: Average detection rates for the different species and different group sizes
Group Size Species Average Site Detection Probability
1 Sambar Deer 0.451
Fallow Deer 0.413
Red Deer 0.289
Hog Deer 0.281
2 Sambar Deer 0.569
Fallow Deer 0.546
Red Deer 0.423
Hog Deer 0.413
3 Sambar Deer 0.644
Fallow Deer 0.628
Hog Deer 0.502
4 Sambar Deer 0.695
Fallow Deer 0.684
5 Sambar Deer 0.732
Fallow Deer 0.724

For a group size of 1 we can generate detection function plots that show how detection rates fall-off over varying distances.

Note the following data suggests we should test hazard functions for Fallow, Red and Hog.

Show code
site_vars <- dplyr::tbl(con, dbplyr::in_schema("deervic", "curated_site_data")) %>%
    dplyr::filter(SiteID %in% !!c(cams_curated$SiteID, "47191")) %>%
    dplyr::collect() %>%
    dplyr::mutate(HerbaceousUnderstoryCover = NNWHUCover + ENWHUCover,
           SiteID = dplyr::case_when(SiteID == "47191" & CameraID == "HO04101053" ~ "47191A",
                              SiteID == "47191" & CameraID != "HO04101053" ~ "47191B",
                              TRUE ~ SiteID)) %>% # native + exotic herbaceous cover
    dplyr::arrange(SiteID) %>%
    as.data.frame()

  n_distance_bins <- 5
  delta <- 2.5
  midpts <- c(1.25, 3.75, 6.25, 8.75, 11.25)
  max_distance <- 12.5
  
  detection_plot_HN <- list()
  
for(i in 1:length(species_names)) {

det_curve <- model_fits[[i]]$draws("DetCurve", format = "draws_matrix") %>%
  as.data.frame() %>%
  head(250) %>%
  pivot_longer(cols = everything())

det_curve_wr <- det_curve %>%
  mutate(var = stringr::str_extract(name, "(?<=\\[).+?(?=\\])")) %>%
  separate(var, into = c("s", "Distance"), sep = ",")

det_vars_pred <- site_vars %>%
  mutate(s = as.character(1:nrow(.)),
         herbaceouslvl = cut(HerbaceousUnderstoryCover,
                             breaks = c(0, 25, 50, 75, 105), 
                             labels = c("0 - 25%", 
                                        "25 - 50%", 
                                        "50 - 75%", 
                                        "75 - 100%"),
                             include.lowest = T, right = FALSE))

det_curve_sum <- det_curve_wr %>%
  mutate(Distance = as.numeric(Distance)-1) %>%
  left_join(det_vars_pred) %>%
  group_by(herbaceouslvl, Distance) %>%
  summarise(median = quantile(value, 0.5),
            q5 = quantile(value, 0.05),
            q95 = quantile(value, 0.95))


y_combined <- colSums(model_data[[i]]$y[,,1]) %>% # just for group size 1
  as.data.frame() %>%
  `colnames<-`("Count") %>%
  mutate(Distance = midpts,
         Prop = Count/(max(Count)),
         CountS = Count/(2 * 2.5 * Distance)/max_distance^2,
         PropS = CountS/(max(CountS)))

detection_plot_HN[[i]] <- ggplot(aes(x = Distance), data = det_curve_sum) +
  geom_col(aes(y = PropS), fill = "grey70", colour = "grey20", width = 2.5, data = y_combined, alpha = 0.7) +
  # geom_errorbar(aes(ymin = PropSq5, ymax = PropSq95),  data = y_combined) +
  # geom_line(aes(y = HNS)) +
  geom_ribbon(aes(ymin = q5, ymax = q95, fill = herbaceouslvl), alpha = 0.5) +
  geom_line(aes(y = median, colour = herbaceouslvl)) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0,1)) +
  scale_fill_brewer(palette = "Set1") +
  scale_colour_brewer(palette = "Set1") +
  ylab("Detection probability (p)") +
  labs(fill = "Herbaceous\nUnderstorey\nCover", colour = "Herbaceous\nUnderstorey\nCover") +
  theme_classic()

}

cowplot::plot_grid(plotlist = detection_plot_HN, ncol = 2)

Transect-surveys

For Sambar, Fallow and Red Deer we were able to estimate detection rates of the various methods used to detect deer (camera, footprints, pellets, rubbings and wallows).

Show code
det_data_species <- list()
for(j in 1:3) {
  
all_draws <- model_fits[[j]]$draws("beta_trans_det", format = "df")

det_marginal_effects <- list()
det_plot <- list()

obs_vars <- unlist(stringr::str_remove_all(string = labels(terms(transect_formula)), pattern =  "log|scale|\\)|\\("))
obs_cols <- unlist(stringr::str_remove_all(string = colnames(model_data[[j]]$trans_pred_matrix), pattern =  "log|scale|\\)|\\("))
obs_logs <- unlist(stringr::str_detect(string = colnames(model_data[[j]]$trans_pred_matrix), pattern =  "log\\("))

obs_labs <- c("Survey Method")

fac <- c(TRUE, TRUE, TRUE, TRUE, TRUE)
fac2 <- c(TRUE)

det_obs <- model_data[[j]]$transects %>% 
  mutate(Survey = factor(Survey))

params_w_levs <- levels(det_obs$Survey)

for(i in 1:(length(obs_cols))) {
det_marginal_effects[[i]] <- marginal_effects_cmd(all_draws, 
                                              param = "beta_trans_det", 
                                              param_number = i, log = obs_logs[i],
                                     model_data = det_obs, 
                                     model_column = obs_vars[c(1,attr(model_data[[j]]$trans_pred_matrix, "assign")[-1])[i]], 
                                     transition = FALSE) %>%
  mutate(group = param, 
         param = params_w_levs[i], 
         factor = fac[i], 
         variable = as.numeric(variable))

if(fac[i]) {
  det_marginal_effects[[i]] <- det_marginal_effects[[i]]
}

}

marginal_prob <- function(x, pwr = 3) {
  xm <- 1-x
  return(1-(xm^pwr))
}

det_marginal_effects_bind <- bind_rows(det_marginal_effects) %>%
  rowwise() %>%
  mutate(value = case_when(param != "Camera" ~ marginal_prob(value), 
                           TRUE ~ value))

det_marginal_effects_split <- split(det_marginal_effects_bind, det_marginal_effects_bind$group)

for(i in 1:length(det_marginal_effects_split)) {
  
  if(fac2[i]) {
    plot_data <- det_marginal_effects_split[[i]] %>% 
      mutate(variable = param, 
             param = group)
  } else {
    plot_data <- det_marginal_effects_split[[i]]
  }

det_plot[[i]] <- plot_data

}
det_data_species[[j]] <- bind_rows(det_plot) %>%
  mutate(species = species_names[j])
}

data_for_plot <- bind_rows(det_data_species)

marginal_effects_plot_cmd_all(data_for_plot, 
                              factor = TRUE,
                              ylab = "Detection probability") +
  xlab("Survey method") +
  scale_y_continuous(limits = c(0,1)) +
  theme(legend.position = "top") +
  scale_fill_brewer(palette = "Set1") +
  scale_x_discrete(expand = c(0, 0))
Detection Probability for the various methods of survey. Camera trap detection probability is based on average deployment length, while transects are based on 3 independent transect searches

Figure 7: Detection Probability for the various methods of survey. Camera trap detection probability is based on average deployment length, while transects are based on 3 independent transect searches

Abundance Processes

Bioregion

We used a spatially-derived random-effect of the bioregion of the sampled site. This random-effect allows us the ability to make predictions that include the variance associated with this random effect. This random-effect also minimises predictions in areas without deer detections (e.g. the mallee).

Show code
bioregion_contribution <- function(model, data, species) {
  eps_bioregion <- model$draws("eps_bioregion", format = "matrix")
  bioregion_data <- data[["bioreg_sf"]]
  colnames(eps_bioregion) <- bioregion_data$bioregion
  
  plot <- mcmc_areas(eps_bioregion, prob_outer = 0.9) +
    ggtitle(species)
  
  return(plot)
}

bio_plot_data <- list()

for(i in 1:length(species_names)) {
 bio_plot_data[[i]] <- bioregion_contribution(model = model_fits[[i]], 
                         data = model_data[[i]], 
                         species = species_names[i])
} 

cowplot::plot_grid(plotlist = bio_plot_data, ncol = 1)
Influence of the bioregion random effect on abundance (log-scale).

Figure 8: Influence of the bioregion random effect on abundance (log-scale).

Fixed-parameter Effects

Show code
ab_joined_list <- list()
for(j in 1:length(species_names)) {
beta_draws <- model_fits[[j]]$draws("beta_psi", format = "df")

ab_marginal_effects <- list()
ab_plot <- list()

    if(deer_species_all[j] == "Axis porcinus") {
    ab_to_use <- ab_formula_3
  } else {
    ab_to_use <- ab_formula_2
  }

phi_vars <- unlist(stringr::str_remove_all(string = labels(terms(ab_to_use)), pattern =  "log|scale|sqrt|\\)|\\("))
phi_logs <- unlist(stringr::str_detect(string = labels(terms(ab_to_use)), pattern =  "log\\("))
phi_sqrts <- c(0.5,0.5,0.5,1,0.5,1)

phi_labs <- c("Bare Soil (%)", 
              "Nitrogen (%)", 
              "Distance to Pastural Land (m)", 
              "Tree Density (%)", 
              'Length of forest edge in 1km2 (m)', 
              "Geographical Distance to coastline")

fac <- c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)

for(i in 1:length(phi_vars)) {
ab_marginal_effects[[i]] <- marginal_effects_cmd(beta_draws, 
                                              param = "beta_psi", 
                                              param_number = i+1, log = phi_logs[i],
                                     model_data = model_data[[j]][["raw_data"]], 
                                     abundance = TRUE,
                                     pwr = phi_sqrts[i],
                                     model_column = phi_vars[i], 
                                     transition = FALSE) %>%
  mutate(species = species_names[j])

}
ab_joined_list[[j]] <- bind_rows(ab_marginal_effects)
}

all_me_data <- bind_rows(ab_joined_list) %>%
  mutate(param = case_when(param == "BareSoil" ~ "Bare Soil (%)", 
                           param == "Nitrogen" ~ "Nitrogen (%)", 
                           param == "PastureDistance" ~ "Distance to Pastural Land (m)", 
                           param == "TreeDensity" ~ "Tree Density (%)", 
                           param == "ForestEdge" ~ 'Amount of Forest Edge per km2 (m)', 
                           param == "dist_coast" ~ "Geographical Distance to Coast"))
  

marginal_effects_plot_cmd_all(all_me_data, 
                          col = "DarkGreen", 
                          factor = FALSE,
                          ylab = "Contribution to Abundance (log-scale)") +
      ggplot2::facet_grid(species~param, scales = "free") +
  theme_bw() +
  xlab("Covariate Value")
Marginal response curves for the various fixed-effect parameters used in the model.

Figure 9: Marginal response curves for the various fixed-effect parameters used in the model.

Vehtari, Aki, Jonah Gabry, Mans Magnusson, Yuling Yao, Paul-Christian Bürkner, Topi Paananen, and Andrew Gelman. 2020. “Loo: Efficient Leave-One-Out Cross-Validation and WAIC for Bayesian Models.” https://mc-stan.org/loo/.
Vehtari, Aki, Andrew Gelman, and Jonah Gabry. 2017. “Practical Bayesian Model Evaluation Using Leave-One-Out Cross-Validation and WAIC.” Statistics and Computing 27 (5): 1413–32.

References